{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "datasets.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/lmoroney/tfbook/blob/master/chapter4/datasets.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zX4Kg8DUTKWO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "# https://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "coY1OkmCnT_8",
        "colab_type": "text"
      },
      "source": [
        "Good to run this to ensure you are using TF2.x"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "ioLbtB3uGKPX",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "c7bf0c0f-1939-405c-ba3a-c262e811997b"
      },
      "source": [
        "try:\n",
        "  # %tensorflow_version only exists in Colab.\n",
        "  %tensorflow_version 2.x\n",
        "except Exception:\n",
        "  pass"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "TensorFlow 2.x selected.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yUG2oehtYM5N",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# At time of creation, tfds was at version 2.0 which had\n",
        "# some bugs in some common datasets. Advise to update to\n",
        "# tfds 2.1.0 like this:\n",
        "pip install tensorflow_datasets==2.1.0"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QGWOsReCW451",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow_datasets as tfds\n",
        "print(tfds.__version__)\n",
        "data, info = tfds.load(\"cnn_dailymail\", with_info=True)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "u2iz4LvVnXzj",
        "colab_type": "text"
      },
      "source": [
        "#First Example: Simple dataset\n",
        "Let's look at how to use a simple dataset like Fashion MNIST"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8YSz4XjbQ0J1",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Install: pip install tensorflow-datasets\n",
        "import tensorflow as tf\n",
        "import tensorflow_datasets as tfds\n",
        "(training_images, training_labels), (test_images, test_labels) =  tfds.as_numpy(tfds.load('fashion_mnist',split = ['train', 'test'], batch_size=-1, as_supervised=True))\n",
        "\n",
        "training_images = training_images / 255.0\n",
        "test_images = test_images / 255.0\n",
        "\n",
        "model = tf.keras.models.Sequential([\n",
        "  tf.keras.layers.Flatten(input_shape=(28,28,1)),\n",
        "  tf.keras.layers.Dense(128, activation=tf.nn.relu),\n",
        "  tf.keras.layers.Dropout(0.2),\n",
        "  tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n",
        "])\n",
        "\n",
        "model.compile(optimizer='adam',\n",
        "              loss='sparse_categorical_crossentropy',\n",
        "              metrics=['accuracy'])\n",
        "\n",
        "\n",
        "model.fit(training_images, training_labels, epochs=5)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "rt7Gz9ndnc5r",
        "colab_type": "text"
      },
      "source": [
        "Code to install tensorflow addons if we are going to use any of its\n",
        "libraries for augmentation"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TL2RU3aiEzG2",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "pip install tensorflow-addons"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "b4kEkLdcnkV6",
        "colab_type": "text"
      },
      "source": [
        "# Second Example: Horses or Humans with Validation\n",
        "This will use Horses or Humans with validation to train with the\n",
        "data loaded from TFDS"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iSq4t32ZHHpt",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow_datasets as tfds\n",
        "import tensorflow_addons as tfa\n",
        "\n",
        "data = tfds.load('horses_or_humans', split='train', as_supervised=True)\n",
        "val_data = tfds.load('horses_or_humans', split='test', as_supervised=True)\n",
        "\n",
        "def augmentimages(image, label):\n",
        "  image = tf.cast(image, tf.float32)\n",
        "  image = (image/255)\n",
        "  image = tf.image.random_flip_left_right(image)\n",
        "  image = tfa.image.rotate(image, 40, interpolation='NEAREST')\n",
        "  return image, label\n",
        "\n",
        "train = data.map(augmentimages)\n",
        "train_batches = train.shuffle(100).batch(32)\n",
        "validation_batches = val_data.batch(32)\n",
        "\n",
        "model = tf.keras.models.Sequential([\n",
        "    tf.keras.layers.Conv2D(16, (3,3), activation='relu', \n",
        "                                      input_shape=(300, 300, 3)),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Flatten(),\n",
        "    tf.keras.layers.Dense(512, activation='relu'),\n",
        "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
        "])\n",
        "\n",
        "\n",
        "model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
        "\n",
        "history = model.fit(train_batches, epochs=10, validation_data=validation_batches, validation_steps=1)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zeJ2C3kznuvq",
        "colab_type": "text"
      },
      "source": [
        "# Third Example: Cats v Dogs with custom splits\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Mr4yTXWn_gFv",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 884
        },
        "outputId": "2f02abf3-8a1e-419f-d395-8099c2878418"
      },
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow_datasets as tfds\n",
        "import tensorflow_addons as tfa\n",
        "import numpy as np\n",
        "\n",
        "\n",
        "def augmentimages(image, label):\n",
        "  image = tf.cast(image, tf.float32)\n",
        "  image = (image/255)\n",
        "  image = tf.image.resize(image,(300,300))\n",
        "  return image, label\n",
        "\n",
        "count_data = tfds.load('cats_vs_dogs',split='train',as_supervised=True)\n",
        "train_data = tfds.load('cats_vs_dogs', split='train[:80%]', as_supervised=True)\n",
        "validation_data = tfds.load('cats_vs_dogs', split='train[80%:90%]', as_supervised=True)\n",
        "test_data = tfds.load('cats_vs_dogs', split='train[-10%:]', as_supervised=True)\n",
        "\n",
        "\n",
        "#count_length = [i for i,_ in enumerate(count_data)][-1] + 1\n",
        "#print(count_length)\n",
        "\n",
        "#train_length = [i for i,_ in enumerate(train_data)][-1] + 1\n",
        "#print(train_length)\n",
        "\n",
        "#validation_length = [i for i,_ in enumerate(validation_data)][-1] + 1\n",
        "#print(validation_length)\n",
        "\n",
        "#test_length = [i for i,_ in enumerate(test_data)][-1] + 1\n",
        "#print(test_length)\n",
        "\n",
        "#augmented_training_data=train_data.map(augmentimages)\n",
        "#augmented_validation_data=validation_data.map(augmentimages)\n",
        "#train_batches = augmented_training_data.shuffle(1024).batch(32)\n",
        "#validation_batches = augmented_validation_data.batch(10)\n",
        "\n",
        "augmented_training_data = train_data.map(augmentimages)\n",
        "train_batches = augmented_training_data.shuffle(1024).batch(32)\n",
        "\n",
        "model = tf.keras.models.Sequential([\n",
        "    tf.keras.layers.Conv2D(16, (3,3), activation='relu', \n",
        "                                      input_shape=(300, 300, 3)),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Flatten(),\n",
        "    tf.keras.layers.Dense(512, activation='relu'),\n",
        "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
        "])\n",
        "\n",
        "\n",
        "model.compile(optimizer='Adam',\n",
        "              loss='binary_crossentropy',\n",
        "              metrics=['accuracy'])\n",
        "\n",
        "history = model.fit(train_batches, epochs=25)"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Epoch 1/25\n",
            "582/582 [==============================] - 65s 111ms/step - loss: 0.6373 - accuracy: 0.6249\n",
            "Epoch 2/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.5163 - accuracy: 0.7395\n",
            "Epoch 3/25\n",
            "582/582 [==============================] - 64s 109ms/step - loss: 0.4232 - accuracy: 0.8045\n",
            "Epoch 4/25\n",
            "582/582 [==============================] - 64s 109ms/step - loss: 0.3512 - accuracy: 0.8434\n",
            "Epoch 5/25\n",
            "582/582 [==============================] - 58s 100ms/step - loss: 0.2903 - accuracy: 0.8760\n",
            "Epoch 6/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.2264 - accuracy: 0.9050\n",
            "Epoch 7/25\n",
            "582/582 [==============================] - 64s 110ms/step - loss: 0.1766 - accuracy: 0.9286\n",
            "Epoch 8/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.1197 - accuracy: 0.9523\n",
            "Epoch 9/25\n",
            "582/582 [==============================] - 63s 109ms/step - loss: 0.0843 - accuracy: 0.9671\n",
            "Epoch 10/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.0633 - accuracy: 0.9761\n",
            "Epoch 11/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.0586 - accuracy: 0.9801\n",
            "Epoch 12/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.0430 - accuracy: 0.9841\n",
            "Epoch 13/25\n",
            "582/582 [==============================] - 63s 108ms/step - loss: 0.0388 - accuracy: 0.9879\n",
            "Epoch 14/25\n",
            "582/582 [==============================] - 63s 109ms/step - loss: 0.0439 - accuracy: 0.9858\n",
            "Epoch 15/25\n",
            "582/582 [==============================] - 58s 100ms/step - loss: 0.0269 - accuracy: 0.9903\n",
            "Epoch 16/25\n",
            "582/582 [==============================] - 63s 108ms/step - loss: 0.0373 - accuracy: 0.9881\n",
            "Epoch 17/25\n",
            "582/582 [==============================] - 59s 101ms/step - loss: 0.0312 - accuracy: 0.9896\n",
            "Epoch 18/25\n",
            "582/582 [==============================] - 63s 108ms/step - loss: 0.0280 - accuracy: 0.9910\n",
            "Epoch 19/25\n",
            "582/582 [==============================] - 63s 107ms/step - loss: 0.0275 - accuracy: 0.9918\n",
            "Epoch 20/25\n",
            "582/582 [==============================] - 58s 99ms/step - loss: 0.0226 - accuracy: 0.9929\n",
            "Epoch 21/25\n",
            "582/582 [==============================] - 58s 100ms/step - loss: 0.0343 - accuracy: 0.9888\n",
            "Epoch 22/25\n",
            "582/582 [==============================] - 63s 109ms/step - loss: 0.0253 - accuracy: 0.9913\n",
            "Epoch 23/25\n",
            "582/582 [==============================] - 63s 108ms/step - loss: 0.0211 - accuracy: 0.9928\n",
            "Epoch 24/25\n",
            "582/582 [==============================] - 58s 100ms/step - loss: 0.0251 - accuracy: 0.9919\n",
            "Epoch 25/25\n",
            "582/582 [==============================] - 58s 100ms/step - loss: 0.0315 - accuracy: 0.9922\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}
